{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "D:\\FinRL-Meta\n"
     ]
    }
   ],
   "source": [
    "%cd /FinRL-Meta/"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "pycharm": {
     "name": "#%%\n"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Episode [100/1300]\tAverage Shortfall for Agent1: $1,169,436.75\n",
      "Episode [100/1300]\tAverage Shortfall for Agent2: $1,183,558.09\n",
      "Episode [200/1300]\tAverage Shortfall for Agent1: $1,281,179.21\n",
      "Episode [200/1300]\tAverage Shortfall for Agent2: $1,281,092.59\n",
      "Episode [300/1300]\tAverage Shortfall for Agent1: $1,279,640.18\n",
      "Episode [300/1300]\tAverage Shortfall for Agent2: $1,280,658.77\n",
      "Episode [400/1300]\tAverage Shortfall for Agent1: $989,478.78\n",
      "Episode [400/1300]\tAverage Shortfall for Agent2: $1,065,107.85\n",
      "Episode [500/1300]\tAverage Shortfall for Agent1: $340,900.76\n",
      "Episode [500/1300]\tAverage Shortfall for Agent2: $339,859.99\n",
      "Episode [600/1300]\tAverage Shortfall for Agent1: $349,452.13\n",
      "Episode [600/1300]\tAverage Shortfall for Agent2: $352,791.78\n",
      "Episode [700/1300]\tAverage Shortfall for Agent1: $302,789.39\n",
      "Episode [700/1300]\tAverage Shortfall for Agent2: $296,596.55\n",
      "Episode [800/1300]\tAverage Shortfall for Agent1: $305,151.05\n",
      "Episode [800/1300]\tAverage Shortfall for Agent2: $301,542.19\n",
      "Episode [900/1300]\tAverage Shortfall for Agent1: $343,508.22\n",
      "Episode [900/1300]\tAverage Shortfall for Agent2: $342,052.92\n",
      "Episode [1000/1300]\tAverage Shortfall for Agent1: $318,731.56\n",
      "Episode [1000/1300]\tAverage Shortfall for Agent2: $317,495.71\n",
      "Episode [1100/1300]\tAverage Shortfall for Agent1: $329,135.85\n",
      "Episode [1100/1300]\tAverage Shortfall for Agent2: $333,255.71\n",
      "Episode [1200/1300]\tAverage Shortfall for Agent1: $300,993.44\n",
      "Episode [1200/1300]\tAverage Shortfall for Agent2: $301,320.57\n",
      "Episode [1300/1300]\tAverage Shortfall for Agent1: $294,413.69\n",
      "Episode [1300/1300]\tAverage Shortfall for Agent2: $292,937.04\n",
      "\n",
      "Average Implementation Shortfall for Agent1: $584,985.46 \n",
      "\n",
      "\n",
      "Average Implementation Shortfall for Agent2: $591,405.37 \n",
      "\n"
     ]
    }
   ],
   "source": [
    "'''Source: https://github.com/AI4Finance-Foundation/Liquidation-Analysis-using-Multi-Agent-Reinforcement-Learning-ICML-2019/blob/master/Model_training.ipynb'''\n",
    "'''Paper: Multi-agent reinforcement learning for liquidation strategy analysis accepted by ICML 2019 AI in Finance: Applications and Infrastructure for Multi-Agent Learning. (https://arxiv.org/abs/1906.11046)'''\n",
    "\n",
    "import numpy as np\n",
    "from collections import deque\n",
    "import meta.env_execution_optimizing.liquidation.env_execution_optimizing as env\n",
    "from meta.env_execution_optimizing.liquidation import utils\n",
    "from meta.env_execution_optimizing.liquidation.ddpg_agent import Agent\n",
    "\n",
    "# Get the default financial and AC Model parameters\n",
    "financial_params, ac_params = utils.get_env_param()\n",
    "\n",
    "# Create simulation environment\n",
    "env = env.MarketEnvironment()\n",
    "\n",
    "# Initialize Feed-forward DNNs for Actor and Critic models.\n",
    "agent1 = Agent(state_size=env.observation_space_dimension(), action_size=env.action_space_dimension(),random_seed = 1225)\n",
    "agent2 = Agent(state_size=env.observation_space_dimension(), action_size=env.action_space_dimension(),random_seed = 108)\n",
    "# Set the liquidation time\n",
    "lqt = 60\n",
    "\n",
    "# Set the number of trades\n",
    "n_trades = 60\n",
    "\n",
    "# Set trader's risk aversion\n",
    "tr1 = 1e-6\n",
    "tr2 = 1e-6\n",
    "\n",
    "# Set the number of episodes to run the simulation\n",
    "episodes = 1300\n",
    "shortfall_list = []\n",
    "shortfall_hist1 = np.array([])\n",
    "shortfall_hist2 = np.array([])\n",
    "shortfall_deque1 = deque(maxlen=100)\n",
    "shortfall_deque2 = deque(maxlen=100)\n",
    "for episode in range(episodes):\n",
    "    # Reset the enviroment\n",
    "    cur_state = env.reset(seed = episode, liquid_time = lqt, num_trades = n_trades, lamb1 = tr1,lamb2 = tr2)\n",
    "\n",
    "    # set the environment to make transactions\n",
    "    env.start_transactions()\n",
    "\n",
    "    for i in range(n_trades + 1):\n",
    "\n",
    "        # Predict the best action for the current state.\n",
    "        cur_state1 = np.delete(cur_state,8)\n",
    "        cur_state2 = np.delete(cur_state,7)\n",
    "        #print(cur_state[5:])\n",
    "        action1 = agent1.act(cur_state1, add_noise = True)\n",
    "        action2 = agent2.act(cur_state2, add_noise = True)\n",
    "        #print(action1,action2)\n",
    "        # Action is performed and new state, reward, info are received.\n",
    "        new_state, reward1, reward2, done1, done2, info = env.step(action1,action2)\n",
    "\n",
    "        # current state, action, reward, new state are stored in the experience replay\n",
    "        new_state1 = np.delete(new_state,8)\n",
    "        new_state2 = np.delete(new_state,7)\n",
    "        agent1.step(cur_state1, action1, reward1, new_state1, done1)\n",
    "        agent2.step(cur_state2, action2, reward2, new_state2, done2)\n",
    "        # roll over new state\n",
    "        cur_state = new_state\n",
    "\n",
    "        if info.done1 and info.done2:\n",
    "            shortfall_hist1 = np.append(shortfall_hist1, info.implementation_shortfall1)\n",
    "            shortfall_deque1.append(info.implementation_shortfall1)\n",
    "\n",
    "            shortfall_hist2 = np.append(shortfall_hist2, info.implementation_shortfall2)\n",
    "            shortfall_deque2.append(info.implementation_shortfall2)\n",
    "            break\n",
    "\n",
    "    if (episode + 1) % 100 == 0: # print average shortfall over last 100 episodes\n",
    "        print('\\rEpisode [{}/{}]\\tAverage Shortfall for Agent1: ${:,.2f}'.format(episode + 1, episodes, np.mean(shortfall_deque1)))\n",
    "        print('\\rEpisode [{}/{}]\\tAverage Shortfall for Agent2: ${:,.2f}'.format(episode + 1, episodes, np.mean(shortfall_deque2)))\n",
    "        shortfall_list.append([np.mean(shortfall_deque1),np.mean(shortfall_deque2)])\n",
    "print('\\nAverage Implementation Shortfall for Agent1: ${:,.2f} \\n'.format(np.mean(shortfall_hist1)))\n",
    "print('\\nAverage Implementation Shortfall for Agent2: ${:,.2f} \\n'.format(np.mean(shortfall_hist2)))\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.13"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 1
}
